// This proto describes the format of the output profile file from
// the tf.data stats tool.
syntax = "proto3";

package tensorflow.profiler;

// Stat for iterator.
message IteratorStat {
  // Id of the iterator.
  int64 id = 1;
  // Start time of the iterator's GetNext in ps.
  int64 start_time_ps = 2;
  // Duration of the iterator's GetNext in ps.
  int64 duration_ps = 3;
  // Self time of the iterator's GetNext in ps. It takes account into async
  // iterators. It is calculated by subtracting the time overlapped with its
  // child iterator's duration from the iterator's duration.
  int64 self_time_ps = 4;
  // Whether it is blocking the root iterator. An async iterator's child
  // iterator may not block its parent iterator if it is executed in advance and
  // does not overlap with the parent iterator.
  bool is_blocking = 5;
  // The number of times this iterator is called. For example, a batch
  // iterator's child iterator may be called multiple times.
  int64 num_calls = 6;
}

// Metadata for iterator.
message IteratorMetadata {
  // Id of the iterator.
  int64 id = 1;
  // Id of the parent iterator.
  int64 parent_id = 2;
  // Name of the iterator.
  string name = 3;
  // Long name of the iterator.
  string long_name = 6;
  // Whether it is an async iterator.
  bool is_async = 4;
  // Parameters of the iterator (e.g., num_parallel_calls).
  map<string, string> params = 5;
}

// Stat and metadata for input pipeline.
message InputPipelineStat {
  // Id of the blocking iterator with the longest self time.
  int64 bottleneck_iterator_id = 2;
  // Latency of the bottleneck iterator.
  int64 bottleneck_iterator_latency_ps = 3;
  // Stats per iterator.
  map<int64, IteratorStat> iterator_stats = 1;
}

// Metadata for input pipeline.
message InputPipelineMetadata {
  // The distribution strategy creates one "host" input pipeline which actually
  // runs tf.data user code. Also, it creates a "device" input pipeline per
  // device (e.g., TensorCore) which takes an element from the host input
  // pipeline and transfers it to the device.
  enum InputPipelineType {
    HOST = 0;
    DEVICE = 1;
  }
  // Id of the input pipeline which is set to the id of its root iterator.
  int64 id = 1;
  InputPipelineType type = 2;
  string name = 4;
  reserved 3;
}

// Collection of metadata and stats of input pipeline.
message InputPipelineStats {
  // Metadata of the input pipeline.
  InputPipelineMetadata metadata = 1;
  // Average latency (i.e., the root iterator's latency) of the input pipeline.
  int64 avg_latency_ps = 3;
  // Minimum latency of the input pipeline.
  int64 min_latency_ps = 4;
  // Maximum latency of the input pipeline.
  int64 max_latency_ps = 5;
  // The number of times this input pipeline was slower than 50 us.
  int64 num_slow_calls = 6;
  // Stats per call sorted by the root iterator's duration.
  repeated InputPipelineStat stats = 2;
}

// Collection of stats of tf.data input pipelines within a host.
message TfDataStats {
  // Metadata per iterator.
  map<int64, IteratorMetadata> iterator_metadata = 2;
  // Stats per input pipeline.
  map<int64, InputPipelineStats> input_pipelines = 1;
}

message TfDataBottleneckAnalysis {
  // Host name.
  string host = 1;
  // Input pipeline name.
  string input_pipeline = 2;
  // Maximum latency of the input pipeline.
  int64 max_latency_ps = 3;
  // Name of the bottleneck iterator.
  string iterator_name = 4;
  // Long name of the bottleneck iterator.
  string iterator_long_name = 5;
  // Latency of the bottleneck iterator.
  int64 iterator_latency_ps = 7;
  // Suggestion to resolve the bottleneck.
  string suggestion = 6;
}

// TfDataStats of all hosts.
message CombinedTfDataStats {
  // Whether it is input bound.
  bool is_input_bound = 3;
  // Summary of the analysis.
  string summary = 4;
  // Bottleneck analysis result.
  repeated TfDataBottleneckAnalysis bottleneck_analysis = 1;
  // TfDataStats per host.
  map<string, TfDataStats> tf_data_stats = 2;
}
